In this study_____
library("tidyverse")
library("plyr")
library("dplyr")
library("ggplot2")
library("scales")
library("ggpubr")
library("gridExtra")
library("grid")
library("GGally")
library("vcfR") # for extracting genotype data from a vcf file
library("data.table")
library("stringr")
library("janitor")
library("gmodels")
library("rstatix")
library("freqtables")
library("broom")
library("DescTools") # for the Goodness-of-Fit test, 3 variables; and for Breslow-Day Test for Homogeneity of the Odds Ratios
library("vcd") # for the woolf test testign log odds for each pair
library("patchwork") # for gathering the plots
#library("fuzzyjoin") # to join tables based on a string in a column
#library("aspi") # Repeated G–tests of Goodness-of-Fit, work only for 2 variables..
#library("RVAideMemoire") # Repeated G–tests of Goodness-of-Fit, work only for 2 variables...
#library("InfoTrad")
#library("ggthemes") # for more colors in the ggplot
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE,
fig.width = 8,
fig.asp = 0.6,
out.width = "100%")
#fig.width = 6,fig.asp = 0.8,out.width = "100%"
Extract genotypes for each site and individual
vcf <- read.vcfR("/Users/nuriteliash/Documents/GitHub/varroa-linkage-map/data/vcf_filter/Q40BIALLDP16HDP40mis.5Chr7/Q40BIALLDP16HDP40mis.5Chr7.recode.vcf", verbose = FALSE )
vcf
## ***** Object of Class vcfR *****
## 223 samples
## 7 CHROMs
## 35,169 variants
## Object size: 293.5 Mb
## 0 percent missing data
## ***** ***** *****
# extract the genotype for each site in each individual
gt <- extract.gt(vcf, element = "GT")
gt <- as.data.frame(t(gt)) %>%
rownames_to_column("ID")
#clean the ID names
gt$ID <- sub("_[^_]+$", "", gt$ID)
We keep only F1 and F2 generations, in which we have both adult male and female.
Eventually, we are left with 114 individuals of 26 families, and all 35,169 biallelic sites.
For example, these are the first 6 sites (13565-25513) of the first chromosome (NW_019211454.1), in one family (family number 240).
table <- gt %>%
t() %>%
as.data.frame() %>%
row_to_names(row_number = 1) %>%
dplyr::select(contains(c("son", "dat"))) # keep only adults of F1 and F2
table %>% select(contains(c("240_"))) %>% head()
## 240_240a_son 240_241c_grnson 240_241b_grndat
## NW_019211454.1_13565 0/1 <NA> <NA>
## NW_019211454.1_18037 0/0 <NA> <NA>
## NW_019211454.1_20998 <NA> <NA> <NA>
## NW_019211454.1_24935 <NA> 0/0 <NA>
## NW_019211454.1_25470 <NA> <NA> <NA>
## NW_019211454.1_25513 <NA> <NA> <NA>
## 240_241a_grndat 240_241_dat
## NW_019211454.1_13565 0/1 0/1
## NW_019211454.1_18037 0/0 0/0
## NW_019211454.1_20998 0/0 0/0
## NW_019211454.1_24935 0/0 0/0
## NW_019211454.1_25470 0/0 0/0
## NW_019211454.1_25513 0/0 <NA>
The family include:
Each individual, was genotyped for each site with one of the three genotypes:
In the following code we viewed the F2 offspring genotype of all possible nine crosses between F1 male and females.
Then we cross heterozygotic sites, to explore the mode of genetic inheritance in varroa mite:
For each crossing we have expected genotype-ratio of the F2, based on the following assumptions under arrhenotokous haplodiploid system:
To detect false sites
we expect all F2 offspring to be homozygotic (0/0) like their parents (F1)
x <- dat %>%
dplyr::select(c("sample","gt","sex", "exp","prop", "total")) %>%
mutate(type = "exp") %>%
dplyr::rename(count = exp)
y <- dat %>%
dplyr::select(c("sample","gt","sex", "obs","total")) %>%
mutate(type = "obs") %>%
dplyr::rename(count = obs) %>%
mutate(prop = (count/total))
df_male <- rbind(x, y) %>%
dplyr::filter(sex == "male")
df_fem <- rbind(x, y) %>%
dplyr::filter(sex == "female")
p_male_pooled <- ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 males") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (0/0) x female (0/0)",
subtitle = paste0("Pooled sites = ", df_male %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric())) +
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
p_female_pooled <- ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 females") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 females of F1 male (0/0) x female (0/0)",
subtitle = paste0("Pooled sites = ", df_fem %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric())) +
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
grid.arrange(p_female_pooled, p_male_pooled, ncol = 2)
# plot ratio for each individual:
# males:
ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (0/0) x female (0/0)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_male, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
# and females:
ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Females of F1 male (0/0) x female (0/0)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_fem, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
we expect all F2 offspring to be homozygotic (1/1) like their parents (F1)
# set the families (include only families with at least one adult F2)
family = grep("grndat|grnson",gt$ID, value=TRUE) %>%
str_extract("[^_]+") %>%
unique()
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_son")), all_vars(. == "1/1")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "1/1")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n) %>%
mutate(sex = case_when(
grepl("son", sample) ~ "male",
grepl("dat", sample) ~ "female"))
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs)
# state the expected site proportion of each genotype and sex under a perfect mendelian segregation of F2
fem_exp <- data.frame(sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0,0,1))
male_exp <- data.frame(sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(0,0,1))
mendel <- bind_rows(fem_exp,male_exp) %>% mutate(sex = as.character(sex))%>% mutate(gt = as.character(gt))
# mutate(type = "exp")
# using on the total genotypes count of sites per sample, calculate the expected counts, and add them to the observed counts into a final dat table
dat <- observed %>%
#mutate(sample = as.character(sample)) %>%
mutate(gt = as.character(gt)) %>%
left_join(mendel, by = c("gt", "sex")) %>%
mutate(exp = total*prop)
x <- dat %>%
dplyr::select(c("sample","gt","sex", "exp","prop", "total")) %>%
mutate(type = "exp") %>%
dplyr::rename(count = exp)
y <- dat %>%
dplyr::select(c("sample","gt","sex", "obs","total")) %>%
mutate(type = "obs") %>%
dplyr::rename(count = obs) %>%
mutate(prop = (count/total))
df_male <- rbind(x, y) %>%
dplyr::filter(sex == "male")
df_fem <- rbind(x, y) %>%
dplyr::filter(sex == "female")
p_male_pooled <- ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 males") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (1/1) x female (1/1)",
subtitle = paste0("Pooled sites = ", df_male %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric())) +
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
p_female_pooled <- ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 females") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 females of F1 male (1/1) x female (1/1)",
subtitle = paste0("Pooled sites = ", df_fem %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric())) +
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
grid.arrange(p_female_pooled, p_male_pooled, ncol = 2)
ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (1/1) x female (1/1)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_male, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Females of F1 male (1/1) x female (1/1)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_fem, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
We expect no sites for this cross, since there is not paternal inheritance to the males
# set the families (include only families with at least one adult F2)
family = grep("grndat|grnson",gt$ID, value=TRUE) %>%
str_extract("[^_]+") %>%
unique()
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/0")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "1/1")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n) %>%
mutate(sex = case_when(
grepl("son", sample) ~ "male",
grepl("dat", sample) ~ "female"))
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs)
# state the expected site proportion of each genotype and sex under a perfect mendelian segregation of F2
fem_exp <- data.frame(sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0,0,0))
male_exp <- data.frame(sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(0,0,0))
mendel <- bind_rows(fem_exp,male_exp) %>% mutate(sex = as.character(sex))%>% mutate(gt = as.character(gt))
# mutate(type = "exp")
# using on the total genotypes count of sites per sample, calculate the expected counts, and add them to the observed counts into a final dat table
dat <- observed %>%
#mutate(sample = as.character(sample)) %>%
mutate(gt = as.character(gt)) %>%
left_join(mendel, by = c("gt", "sex")) %>%
mutate(exp = total*prop)
x <- dat %>%
dplyr::select(c("sample","gt","sex", "exp","prop", "total")) %>%
mutate(type = "exp") %>%
dplyr::rename(count = exp)
y <- dat %>%
dplyr::select(c("sample","gt","sex", "obs","total")) %>%
mutate(type = "obs") %>%
dplyr::rename(count = obs) %>%
mutate(prop = (count/total))
df_male <- rbind(x, y) %>%
dplyr::filter(sex == "male")
df_fem <- rbind(x, y) %>%
dplyr::filter(sex == "female")
p_male_pooled <- ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 males") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (0/0) x female (1/1)",
subtitle = paste0("Pooled sites = ", df_male %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric())) +
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
p_female_pooled <- ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 females") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 females of F1 male (0/0) x female (1/1)",
subtitle = paste0("Pooled sites = ", df_fem %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric())) +
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
grid.arrange(p_female_pooled, p_male_pooled, ncol = 2)
ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (0/0) x female (1/1)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_male, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Females of F1 male (0/0) x female (1/1)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_fem, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
We expect no sites for this cross, sinse there is not paternal inheritance to the males
# set the families (include only families with at least one adult F2)
family = grep("grndat|grnson",gt$ID, value=TRUE) %>%
str_extract("[^_]+") %>%
unique()
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_son")), all_vars(. == "1/1")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/0")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n) %>%
mutate(sex = case_when(
grepl("son", sample) ~ "male",
grepl("dat", sample) ~ "female"))
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs)
# state the expected site proportion of each genotype and sex under a perfect mendelian segregation of F2
fem_exp <- data.frame(sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0,0,0))
male_exp <- data.frame(sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(0,0,0))
mendel <- bind_rows(fem_exp,male_exp) %>% mutate(sex = as.character(sex))%>% mutate(gt = as.character(gt))
# mutate(type = "exp")
# using on the total genotypes count of sites per sample, calculate the expected counts, and add them to the observed counts into a final dat table
dat <- observed %>%
#mutate(sample = as.character(sample)) %>%
mutate(gt = as.character(gt)) %>%
left_join(mendel, by = c("gt", "sex")) %>%
mutate(exp = total*prop)
x <- dat %>%
dplyr::select(c("sample","gt","sex", "exp","prop", "total")) %>%
mutate(type = "exp") %>%
dplyr::rename(count = exp)
y <- dat %>%
dplyr::select(c("sample","gt","sex", "obs","total")) %>%
mutate(type = "obs") %>%
dplyr::rename(count = obs) %>%
mutate(prop = (count/total))
df_male <- rbind(x, y) %>%
dplyr::filter(sex == "male")
df_fem <- rbind(x, y) %>%
dplyr::filter(sex == "female")
p_male_pooled <- ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 males") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (1/1) x female (0/0)",
subtitle = paste0("Pooled sites = ", df_male %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric())) +
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
p_female_pooled <- ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 females") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 females of F1 male (1/1) x female (0/0)",
subtitle = paste0("Pooled sites = ", df_fem %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric()))+
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
grid.arrange(p_female_pooled, p_male_pooled, ncol = 2)
ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (1/1) x female (0/0)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_male, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Females of F1 male (1/1) x female (0/0)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_fem, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
# set the families (include only families with at least one adult F2)
family = grep("grndat|grnson",gt$ID, value=TRUE) %>%
str_extract("[^_]+") %>%
unique()
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/0")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/1")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n) %>%
mutate(sex = case_when(
grepl("son", sample) ~ "male",
grepl("dat", sample) ~ "female"))
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs)
# state the expected site proportion of each genotype and sex under a perfect mendelian segregation of F2
fem_exp <- data.frame(sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0.5, 0.5,0))
male_exp <- data.frame(sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(0.5, 0,0.5))
mendel <- bind_rows(fem_exp,male_exp) %>% mutate(sex = as.character(sex))%>% mutate(gt = as.character(gt))
# mutate(type = "exp")
# using on the total genotypes count of sites per sample, calculate the expected counts, and add them to the observed counts into a final dat table
dat <- observed %>%
#mutate(sample = as.character(sample)) %>%
mutate(gt = as.character(gt)) %>%
left_join(mendel, by = c("gt", "sex")) %>%
mutate(exp = total*prop)
x <- dat %>%
dplyr::select(c("sample","gt","sex", "exp","prop", "total")) %>%
mutate(type = "exp") %>%
dplyr::rename(count = exp)
y <- dat %>%
dplyr::select(c("sample","gt","sex", "obs","total")) %>%
mutate(type = "obs") %>%
dplyr::rename(count = obs) %>%
mutate(prop = (count/total))
df_male <- rbind(x, y) %>%
dplyr::filter(sex == "male")
df_fem <- rbind(x, y) %>%
dplyr::filter(sex == "female")
p_male_pooled <- ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 males") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (0/0) x female (0/1)",
subtitle = paste0("Pooled sites = ", df_male %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric())) +
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
p_female_pooled <- ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 females") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 females of F1 male (0/0) x female (0/1)",
subtitle = paste0("Pooled sites = ", df_fem %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric())) +
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
grid.arrange(p_female_pooled, p_male_pooled, ncol = 2)
ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (0/0) x female (0/1)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_male, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic()
ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Females of F1 male (0/0) x female (0/1)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_fem, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic()
# set the families (include only families with at least one adult F2)
family = grep("grndat|grnson",gt$ID, value=TRUE) %>%
str_extract("[^_]+") %>%
unique()
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_son")), all_vars(. == "1/1")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/1")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n) %>%
mutate(sex = case_when(
grepl("son", sample) ~ "male",
grepl("dat", sample) ~ "female"))
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs)
# state the expected site proportion of each genotype and sex under a perfect mendelian segregation of F2
fem_exp <- data.frame(sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0,0.5,1))
male_exp <- data.frame(sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(0.5,0,0.5))
mendel <- bind_rows(fem_exp,male_exp) %>% mutate(sex = as.character(sex))%>% mutate(gt = as.character(gt))
# mutate(type = "exp")
# using on the total genotypes count of sites per sample, calculate the expected counts, and add them to the observed counts into a final dat table
dat <- observed %>%
#mutate(sample = as.character(sample)) %>%
mutate(gt = as.character(gt)) %>%
left_join(mendel, by = c("gt", "sex")) %>%
mutate(exp = total*prop)
x <- dat %>%
dplyr::select(c("sample","gt","sex", "exp","prop", "total")) %>%
mutate(type = "exp") %>%
dplyr::rename(count = exp)
y <- dat %>%
dplyr::select(c("sample","gt","sex", "obs","total")) %>%
mutate(type = "obs") %>%
dplyr::rename(count = obs) %>%
mutate(prop = (count/total))
df_male <- rbind(x, y) %>%
dplyr::filter(sex == "male")
df_fem <- rbind(x, y) %>%
dplyr::filter(sex == "female")
p_male_pooled <- ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 males") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (1/1) x female (0/1)",
subtitle = paste0("Pooled sites = ", df_male %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric())) +
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
p_female_pooled <- ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 females") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 females of F1 male (1/1) x female (0/1)",
subtitle = paste0("Pooled sites = ", df_fem %>% dplyr::filter(gt == "0/0") %>% dplyr::filter(type == "obs") %>% summarise(sum(total)) %>% as.numeric())) +
scale_x_discrete("type", labels = c("Expected","Observed")) +
labs(fill = "Genotype") +
theme_classic() +
theme(axis.text.x = element_text(size=15),
axis.title.x = element_blank())
grid.arrange(p_female_pooled, p_male_pooled, ncol = 2)
ggplot(df_male, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Males of F1 male (1/1) x female (0/1)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_male, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic()
ggplot(df_fem, aes(fill=gt, y=prop, x=type)) +
geom_bar(position="fill", stat="identity", ) +
xlab("F2 siblings") +
ylab("Proportion of F2 genotype") +
labs(title = "F2 Females of F1 male (1/1) x female (0/1)") +
# geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
geom_text(data = filter(df_fem, gt == "0/0"), aes(y=prop, x=type, label=total), vjust = 0) +
facet_wrap(~ sample) +
labs(fill = "Genotype") +
theme_classic()
The former crosses of heterozygotic females (5 and 6) show that F2 males can be heterozygotic and carry two alleles, like their mother.
But are these sites real? and if they are, can they be transmitted to their daughters?
Looking at the F2 offspring of crossing heterozygotic F1 males with females, we tested 3 possible predictions:
# set the families (include only families with at least one adult F2)
family = grep("grndat|grnson",gt$ID, value=TRUE) %>%
str_extract("[^_]+") %>%
unique()
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/1")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/0")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n) %>%
mutate(sex = case_when(
grepl("son", sample) ~ "male",
grepl("dat", sample) ~ "female"))
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs)
# state the expected site proportion of each genotype and sex under the different hypothesis:
# Heterozygotic sites are real:Males can carry and inherit two alleles
fem_sitesReal <- data.frame(type = "sitesReal", sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0.5, 0.5, 0))
# Heterozygotic sites are false:Males can carry and inherit only one allele
# the heterozygotic sites are actually (0)
fem_sitesFalse_0 <- data.frame(type = "sitesFalse_0",sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(1,0,0))
# the heterozygotic sites are actually (1)
fem_sitesFalse_1 <- data.frame(type = "sitesFalse_1", sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0,1,0))
# males F2 genotype is expected to stay the same , as it is produced from unfertilized egg, hence have no paternal inheritance
male_sitesReal <- data.frame(type = "sitesReal",sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(1,0,0))
male_sitesFalse_0 <- data.frame(type = "sitesFalse_0",sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(1,0,0))
male_sitesFalse_1 <- data.frame(type = "sitesFalse_1", sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(1,0,0))
expected_prop <- bind_rows(fem_sitesReal,fem_sitesFalse_0,fem_sitesFalse_1,male_sitesReal,male_sitesFalse_0,male_sitesFalse_1) %>% mutate(sex = as.character(sex))%>% mutate(gt = as.character(gt))
# mutate(type = "exp")
# using on the total genotypes count of sites per sample, calculate the expected counts, and add them to the observed counts into a final dat table
expected <- observed %>%
mutate(gt = as.character(gt)) %>%
full_join(expected_prop, by = c("gt", "sex")) %>%
mutate(count = total*prop)
x <- observed %>%
dplyr::select(c("sample","sex","gt","total", "obs")) %>%
mutate(type = "Observed") %>%
dplyr::rename(count = obs) %>%
mutate(prop = (count/total))
dat <- bind_rows(x, expected) %>%
dplyr::select(-obs)
# subset for males and females, and reorder the "types" levels:
dat_fem <- dat %>% dplyr::filter(sex == "female")
level_order_fem <- factor(dat_fem$type, level = c("sitesFalse_0", "sitesFalse_1", "sitesReal", "Observed"))
dat_male <- dat %>% dplyr::filter(sex == "male")
level_order_male <- factor(dat_male$type, level = c("sitesFalse_0", "sitesFalse_1", "sitesReal", "Observed"))
p_male_pooled <- ggplot(dat_male, aes(fill=gt, y=prop, x=level_order_male)) +
geom_bar(position="fill", stat="identity", width = 0.8) +
ylab("Proportion of F2 genotype") +
labs(title = paste0("F2 Male, pooled sites = ", dat_male %>% dplyr::filter(type == "Observed") %>% summarise(sum(count)) %>% as.numeric())) +
labs(fill = "Genotype:") +
theme_classic() +
theme(axis.text.x = element_text(size=12, angle = 45,hjust = 1),
axis.title.x = element_blank())
p_female_pooled <- ggplot(dat_fem, aes(fill=gt, y=prop, x=level_order_fem)) +
geom_bar(position="fill", stat="identity", width = 0.8) +
ylab("Proportion of F2 genotype") +
labs(title = paste0("F2 Female, pooled sites = ", dat_fem %>% dplyr::filter(type == "Observed") %>% summarise(sum(count)) %>% as.numeric())) +
labs(fill = "Genotype:") +
theme_classic() +
theme(axis.text.x = element_text(size=12, angle = 45,hjust = 1),
axis.title.x = element_blank())
combined <- p_female_pooled + p_male_pooled & theme(legend.position = "bottom")
combined + plot_layout(guides = "collect") + plot_annotation(
title = 'F2 genotype. F1 Cross: male (0/1) x female (0/0)')
# plot each individual:
ggplot(dat_male, aes(fill=gt, y=prop, x=level_order_male)) +
geom_bar(position="fill", stat="identity", ) +
ylab("Proportion of F2 genotype") +
labs(title = "F2 male genotype. F1 Cross: male (0/1) x female (0/0)", fill = "Genotype:") +
geom_text(data = dat_male %>% filter(gt == "0/1") %>% filter(type == "Observed"), aes(x = 4, y = 0.8, label = total), inherit.aes = FALSE) +
facet_wrap(~ sample) +
theme_classic()+
theme(legend.position = "bottom", axis.title.x = element_blank(), axis.text.x = element_text(size=8, angle = 45,hjust = 1))
ggplot(dat_fem, aes(fill=gt, y=prop, x=level_order_fem)) +
geom_bar(position="fill", stat="identity", ) +
ylab("Proportion of F2 genotype") +
labs(title = "F2 female genotype. F1 Cross: male (0/1) x female (0/0)", fill = "Genotype:") +
geom_text(data = dat_fem %>% filter(gt == "0/1") %>% filter(type == "Observed"), aes(x = 4, y = 0.8, label = total), inherit.aes = FALSE) +
facet_wrap(~ sample) +
theme_classic()+
theme(legend.position = "bottom", axis.title.x = element_blank(), axis.text.x = element_text(size=8, angle = 45,hjust = 1))
# set the families (include only families with at least one adult F2)
family = grep("grndat|grnson",gt$ID, value=TRUE) %>%
str_extract("[^_]+") %>%
unique()
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/1")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "1/1")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n) %>%
mutate(sex = case_when(
grepl("son", sample) ~ "male",
grepl("dat", sample) ~ "female"))
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs)
# state the expected site proportion of each genotype and sex under the different hypothesis:
# Heterozygotic sites are real:Males can carry and inherit two alleles
fem_sitesReal <- data.frame(type = "sitesReal", sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0,0.5,0.5))
# Heterozygotic sites are false:Males can carry and inherit only one allele
# the heterozygotic sites are actually (0)
fem_sitesFalse_0 <- data.frame(type = "sitesFalse_0",sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0,1,0))
# the heterozygotic sites are actually (1)
fem_sitesFalse_1 <- data.frame(type = "sitesFalse_1", sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0,0,1))
# males F2 genotype is expected to stay the same , as it is produced from unfertilized egg, hence have no paternal inheritance
male_sitesReal <- data.frame(type = "sitesReal",sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(0,0,1))
male_sitesFalse_0 <- data.frame(type = "sitesFalse_0",sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(0,0,1))
male_sitesFalse_1 <- data.frame(type = "sitesFalse_1", sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(0,0,1))
expected_prop <- bind_rows(fem_sitesReal,fem_sitesFalse_0,fem_sitesFalse_1,male_sitesReal,male_sitesFalse_0,male_sitesFalse_1) %>% mutate(sex = as.character(sex))%>% mutate(gt = as.character(gt))
# mutate(type = "exp")
# using on the total genotypes count of sites per sample, calculate the expected counts, and add them to the observed counts into a final dat table
expected <- observed %>%
mutate(gt = as.character(gt)) %>%
full_join(expected_prop, by = c("gt", "sex")) %>%
mutate(count = total*prop)
x <- observed %>%
dplyr::select(c("sample","sex","gt","total", "obs")) %>%
mutate(type = "Observed") %>%
dplyr::rename(count = obs) %>%
mutate(prop = (count/total))
dat <- bind_rows(x, expected) %>%
dplyr::select(-obs)
# subset for males and females, and reorder the "types" levels:
dat_fem <- dat %>% dplyr::filter(sex == "female")
level_order_fem <- factor(dat_fem$type, level = c("sitesFalse_0", "sitesFalse_1", "sitesReal", "Observed"))
dat_male <- dat %>% dplyr::filter(sex == "male")
level_order_male <- factor(dat_male$type, level = c("sitesFalse_0", "sitesFalse_1", "sitesReal", "Observed"))
p_male_pooled <- ggplot(dat_male, aes(fill=gt, y=prop, x=level_order_male)) +
geom_bar(position="fill", stat="identity", width = 0.8) +
ylab("Proportion of F2 genotype") +
labs(title = paste0("F2 Male, pooled sites = ", dat_male %>% dplyr::filter(type == "Observed") %>% summarise(sum(count)) %>% as.numeric())) +
labs(fill = "Genotype:") +
theme_classic() +
theme(axis.text.x = element_text(size=12, angle = 45,hjust = 1),
axis.title.x = element_blank())
p_female_pooled <- ggplot(dat_fem, aes(fill=gt, y=prop, x=level_order_fem)) +
geom_bar(position="fill", stat="identity", width = 0.8) +
ylab("Proportion of F2 genotype") +
labs(title = paste0("F2 Female, pooled sites = ", dat_fem %>% dplyr::filter(type == "Observed") %>% summarise(sum(count)) %>% as.numeric())) +
labs(fill = "Genotype:") +
theme_classic() +
theme(axis.text.x = element_text(size=12, angle = 45,hjust = 1),
axis.title.x = element_blank())
combined <- p_female_pooled + p_male_pooled & theme(legend.position = "bottom")
combined + plot_layout(guides = "collect") + plot_annotation(
title = 'F2 genotype. F1 Cross: male (0/1) x female (1/1)')
# plot each individual:
ggplot(dat_male, aes(fill=gt, y=prop, x=level_order_male)) +
geom_bar(position="fill", stat="identity", ) +
ylab("Proportion of F2 genotype") +
labs(title = "F2 male genotype. F1 Cross: male (0/1) x female (1/1)", fill = "Genotype:") +
geom_text(data = dat_male %>% filter(gt == "0/1") %>% filter(type == "Observed"), aes(x = 4, y = 0.8, label = total), inherit.aes = FALSE) +
facet_wrap(~ sample) +
theme_classic()+
theme(legend.position = "bottom", axis.title.x = element_blank(), axis.text.x = element_text(size=8, angle = 45,hjust = 1))
ggplot(dat_fem, aes(fill=gt, y=prop, x=level_order_fem)) +
geom_bar(position="fill", stat="identity", ) +
ylab("Proportion of F2 genotype") +
labs(title = "F2 female genotype. F1 Cross: male (0/1) x female (1/1)", fill = "Genotype:") +
geom_text(data = dat_fem %>% filter(gt == "0/1") %>% filter(type == "Observed"), aes(x = 4, y = 0.8, label = total), inherit.aes = FALSE) +
facet_wrap(~ sample) +
theme_classic()+
theme(legend.position = "bottom", axis.title.x = element_blank(), axis.text.x = element_text(size=8, angle = 45,hjust = 1))
# set the families (include only families with at least one adult F2)
family = grep("grndat|grnson",gt$ID, value=TRUE) %>%
str_extract("[^_]+") %>%
unique()
# define a list to put all the data frames in
obs <- list()
for (fam in family) {
obs[[fam]] <- table %>%
dplyr::select(starts_with(fam)) %>%
dplyr::filter_at(vars(matches("_son")), all_vars(. == "0/1")) %>%
dplyr::filter_at(vars(matches("_dat")), all_vars(. == "0/1")) %>%
dplyr::select(contains("grn")) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::rename(sample = name, gt = value) %>%
dplyr::count(sample, gt, .drop = FALSE) %>%
dplyr::filter(gt %in% c("0/0", "1/1", "0/1")) %>%
mutate(n = as.numeric(n)) %>%
group_by(sample) %>%
mutate(total = as.numeric(sum(n))) %>%
dplyr::rename(obs = n) %>%
mutate(sex = case_when(
grepl("son", sample) ~ "male",
grepl("dat", sample) ~ "female"))
}
# bind all families together, to a final data frame containing all observed counts
observed <- do.call("rbind", obs)
# state the expected site proportion of each genotype and sex under the different hypothesis:
# Heterozygotic sites are real:Males can carry and inherit two alleles
fem_sitesReal <- data.frame(type = "sitesReal", sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0.25, 0.5, 0.25))
# Heterozygotic sites are false:Males can carry and inherit only one allele
# the heterozygotic sites are actually (0)
fem_sitesFalse_0 <- data.frame(type = "sitesFalse_0",sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0.5, 0.5, 0))
# the heterozygotic sites are actually (1)
fem_sitesFalse_1 <- data.frame(type = "sitesFalse_1", sex = "female", gt = c("0/0", "0/1", "1/1") , prop = c(0, 0.5, 0.5))
# males F2 genotype is expected to stay the same , as it is produced from unfertilized egg, hence have no paternal inheritance
male_sitesReal <- data.frame(type = "sitesReal",sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(0.5, 0, 0.5))
male_sitesFalse_0 <- data.frame(type = "sitesFalse_0",sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(0.5, 0, 0.5))
male_sitesFalse_1 <- data.frame(type = "sitesFalse_1", sex = "male",gt = c("0/0", "0/1", "1/1"),prop = c(0.5, 0, 0.5))
expected_prop <- bind_rows(fem_sitesReal,fem_sitesFalse_0,fem_sitesFalse_1,male_sitesReal,male_sitesFalse_0,male_sitesFalse_1) %>% mutate(sex = as.character(sex))%>% mutate(gt = as.character(gt))
# mutate(type = "exp")
# using on the total genotypes count of sites per sample, calculate the expected counts, and add them to the observed counts into a final dat table
expected <- observed %>%
mutate(gt = as.character(gt)) %>%
full_join(expected_prop, by = c("gt", "sex")) %>%
mutate(count = total*prop)
x <- observed %>%
dplyr::select(c("sample","sex","gt","total", "obs")) %>%
mutate(type = "Observed") %>%
dplyr::rename(count = obs) %>%
mutate(prop = (count/total))
dat <- bind_rows(x, expected) %>%
dplyr::select(-obs)
# subset for males and females, and reorder the "types" levels:
dat_fem <- dat %>% dplyr::filter(sex == "female")
level_order_fem <- factor(dat_fem$type, level = c("sitesFalse_0", "sitesFalse_1", "sitesReal", "Observed"))
dat_male <- dat %>% dplyr::filter(sex == "male")
level_order_male <- factor(dat_male$type, level = c("sitesFalse_0", "sitesFalse_1", "sitesReal", "Observed"))
p_male_pooled <- ggplot(dat_male, aes(fill=gt, y=prop, x=level_order_male)) +
geom_bar(position="fill", stat="identity", width = 0.8) +
ylab("Proportion of F2 genotype") +
labs(title = paste0("F2 Male, pooled sites = ", dat_male %>% dplyr::filter(type == "Observed") %>% summarise(sum(count)) %>% as.numeric())) +
labs(fill = "Genotype:") +
theme_classic() +
theme(axis.text.x = element_text(size=12, angle = 45,hjust = 1),
axis.title.x = element_blank())
p_female_pooled <- ggplot(dat_fem, aes(fill=gt, y=prop, x=level_order_fem)) +
geom_bar(position="fill", stat="identity", width = 0.8) +
ylab("Proportion of F2 genotype") +
labs(title = paste0("F2 Female, pooled sites = ", dat_fem %>% dplyr::filter(type == "Observed") %>% summarise(sum(count)) %>% as.numeric())) +
labs(fill = "Genotype:") +
theme_classic() +
theme(axis.text.x = element_text(size=12, angle = 45,hjust = 1),
axis.title.x = element_blank())
combined <- p_female_pooled + p_male_pooled & theme(legend.position = "bottom")
combined + plot_layout(guides = "collect") + plot_annotation(
title = 'F2 genotype. F1 Cross: male (0/1) x female (0/1)')
# plot each individual:
ggplot(dat_male, aes(fill=gt, y=prop, x=level_order_male)) +
geom_bar(position="fill", stat="identity", ) +
ylab("Proportion of F2 genotype") +
labs(title = "F2 male genotype. F1 Cross: male (0/1) x female (0/1)", fill = "Genotype:") +
geom_text(data = dat_male %>% filter(gt == "0/1") %>% filter(type == "Observed"), aes(x = 4, y = 0.8, label = total), inherit.aes = FALSE) +
facet_wrap(~ sample) +
theme_classic()+
theme(legend.position = "bottom", axis.title.x = element_blank(), axis.text.x = element_text(size=8, angle = 45,hjust = 1))
ggplot(dat_fem, aes(fill=gt, y=prop, x=level_order_fem)) +
geom_bar(position="fill", stat="identity", ) +
ylab("Proportion of F2 genotype") +
labs(title = "F2 female genotype. F1 Cross: male (0/1) x female (0/1)", fill = "Genotype:") +
geom_text(data = dat_fem %>% filter(gt == "0/1") %>% filter(type == "Observed"), aes(x = 4, y = 0.8, label = total), inherit.aes = FALSE) +
facet_wrap(~ sample) +
theme_classic()+
theme(legend.position = "bottom", axis.title.x = element_blank(), axis.text.x = element_text(size=8, angle = 45,hjust = 1))
We did 2 types of a-parametric tests for independence, testing the predicted counts against the observed counts: